# -*- encoding: utf-8 -*-
# @Author：lijinxi
# @Time ：2021/1/13 13:48
# @File：reformat.py

# 重新生成可视化数据
msgs = []
scores = []
with open('alignments_accurate.txt', 'r', encoding='utf-8') as f:
    index = 0
    key1 = ''
    for line in f:
        if '[f]' in line:
            line = line.strip('\n').split(' ')
            if index % 30 == 0:
                key1 = line[0]
                index += 1
            line = [s for s in line if s != '']
            key2 = line[0]
            align_len = line[-5].replace('(', '').replace(')', '')
            score = line[-3]
            two_genes = key1 + '_' + key2
            msg = two_genes + ',' + align_len + ',' + score
            if key1 != key2:
                scores.append(int(score))
                msgs.append(msg)
print(len(msgs))
print(sum(scores) / len(scores))

with open('summary.csv', 'w', encoding='utf-8-sig') as f:
    f.write('two genes,align len,score' + '\n')
    for msg in msgs:
        f.write(msg + '\n')
